Packages and Setup

options(width = 150)
## General
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## <U+2713> ggplot2 3.2.1     <U+2713> purrr   0.3.3
## <U+2713> tibble  2.1.3     <U+2713> dplyr   0.8.3
## <U+2713> tidyr   1.0.0     <U+2713> stringr 1.4.0
## <U+2713> readr   1.3.1     <U+2713> forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(effsize)
## Warning: package 'effsize' was built under R version 3.6.2
library(lm.beta)
## Graphing
library(ggplot2)
theme_set(theme_bw())
## Tables
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## Reading in data
library(readxl)
library(haven)  ## for reading sas file
## Microbiome functions
library(phyloseq)

## load up needed functions
source("microbiome_statistics_and_functions.R")

# Global options
# cutoffs for microbiome views
r.cut <- .5
p.cut <- .05

Read in Data and Fix Coding

#cfrip_data <- read.table('FaucherMA_CFRIP_MappingFileTemplate_AlphaDiv_ASA24_DHQ_HEI_2019_04_02.csv', header=T, sep=",")
cfrip_data <- haven::read_stata('Micro_Diet_Mapping_NoDup.dta')

#cfrip_data$SubjectID <- cfrip_data$`SourceID(Subject ID)`

## Fixing some variable codings
# Next, check the normal vs. obese identifiers.
unique(cfrip_data$BMI_Class)
## [1] 3 0 1 2
cfrip_data$BMI_Class[cfrip_data$BMI_Class == 1] <- 'Class 1'
cfrip_data$BMI_Class[cfrip_data$BMI_Class == 2] <- 'Class 2-3'
cfrip_data$BMI_Class[cfrip_data$BMI_Class == 3] <- 'Class 2-3'
cfrip_data$BMI_Class[cfrip_data$BMI_Class == 0] <- 'Normal'
cfrip_data$BMI_Class <- factor(cfrip_data$BMI_Class, levels = c("Normal", "Class 1", "Class 2-3"), ordered = T)
unique(cfrip_data$BMI_Class)
## [1] Class 2-3 Normal    Class 1  
## Levels: Normal < Class 1 < Class 2-3
#Next, check on the GWG categories variable.
#Also, make the name of this variable smaller...
unique(cfrip_data$GWG_above_below_within_IOM)
## [1] 1 3 2
cfrip_data$GWG_Cat <- factor(
  cfrip_data$GWG_above_below_within_IOM, levels = c(2,3,1),
  labels = c('Below', 'Within', 'Above'), ordered = T
) ## End factorizing this variable
table(cfrip_data$GWG_Cat)
## 
##  Below Within  Above 
##     25     30     47
##Fixing the missing values code for GWG total
summary(cfrip_data$Total_GWG)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -3.00   11.00   20.00   19.23   28.50   44.00
cfrip_data$Total_GWG <- ifelse(cfrip_data$Total_GWG == 999, NA, cfrip_data$Total_GWG )
summary(cfrip_data$Total_GWG)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -3.00   11.00   20.00   19.23   28.50   44.00
## SAS Exported HEI scores
#hei_scores <- read_sas('heistuff.sas7bdat')
# HEI scores are in the last 26 variables (229:255)

hei_scores <- readr::read_csv('data_DHQ_clean_HEI2010.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   RespondentID = col_character(),
##   GWG_cat = col_character()
## )
## See spec(...) for full column specifications.
# HEI scores are in the last 26 variables (230:242)

# need to map to timepoint 1
d <- filter(cfrip_data, SampleSource == 'S-01')
d <- d %>% arrange(desc(RespondentID))
d$RespondentID <- as.character(d$RespondentID) 
# save column names of d
dnames <- colnames(d)
hei_scores <- hei_scores %>% arrange(desc(RespondentID))
j <- ncol(d)
k <- ncol(hei_scores) ## number of columns to add to d
d <- cbind(d, matrix(nrow=nrow(d), ncol=k))
i <- 1
for(i in 1:nrow(d)){
  id <- d[i, 'RespondentID']
  id <- str_sub(id, 1, 7)
  print(id)
  if(id == "") break
  d[i, (j+1):(j+k)] <- hei_scores[ grep(id, hei_scores$RespondentID) , ]
}
## [1] "PREG023"
## [1] "PREG022"
## [1] "PREG021"
## [1] "PREG020"
## [1] "PREG019"
## [1] "PREG016"
## [1] "PREG015"
## [1] "PREG013"
## [1] "PREG012"
## [1] "PREG011"
## [1] "PREG010"
## [1] "PREG009"
## [1] "PREG008"
## [1] "PREG006"
## [1] "PREG005"
## [1] "PREG004"
## [1] "PREG001"
## [1] ""
colnames(d) <- c(dnames, colnames(hei_scores))
hei_scores <- d ## resave as someone more easily recognizable


## loop through names and make sure names are unique
hei_scores <- repair_names(hei_scores)

Now I need to subset the CFRIP data that contains all the data into two subsets:

  1. For the first time point
  2. For the second time point

Once I have these two sets of data, I can create two dataset to run the exploratory analyses on. For each of these subsets, the data will need to be for the stool samples only.

## subset to only Stool (S) and first timepoint (01)
ffq_t1 <- filter(cfrip_data, SampleSource == 'S-01', RecallNo == 1 )
## subset to only Stool (S) and second timepoint (02) 
ffq_t2 <- filter(cfrip_data, SampleSource == "S-02", RecallNo == 2)

Main Tasks to Complete

  1. Table – Diet Quality (FFQ) Across Categories of Pre-pregnancy BMI status
  2. Table - Diet Quality (FFQ) Across Categories of GWG among Women with BMI>=30
  3. Heat Map - Correlation between nutrients (ASA24) and distal gut microbiota
  4. Box Plots – Relative abundance of taxon by level of dietary variable, by category of GWG.

Dietary factors:

  1. HEI total (HEI2010_TOTAL_SCORE)
  2. Total Veg (HEIX1_TOTALVEG)
  3. Green beans (HEIX2_GREEN_AND_BEAN)
  4. Total fruit (HEIX3_TOTALFRUIT)
  5. Whole fruit (HEIX4_WHOLEFRUIT)
  6. Whole grains (HEIX5_WHOLEGRAIN)
  7. Dairy (HEIX6_TOTALDAIRY)
  8. Total protein (HEIX7_TOTPROT)
  9. Seafood (HEIX8_SEAPLANT_PROT)
  10. Proteins (How is this different tha 8. ?)
  11. Fatty Acids (HEIX9_FATTYACID)
  12. Sodium (HEIX10_SODIUM)
  13. Refined Grains (HEIX11_REFINEDGRAIN)
  14. SOFAAS (HEIX12_SOFAAS)

Analyses to for tasks 1-2

Need to run ANOVAs for

  1. BMI Class (Normal, Class I, Class II/III) 2. GWG Categories (below , within, above)

In the output, there will be the Total mean of the dietary factor, the mean by the three categories, the F-statistic, P-value, \(\omega^2, \eta^2\). For each mean, I will include the SE (for +/-).

Analyses for task 3

Screening for BMI/GWG correlations with gut microbiome.\

For these analyses, I have two major sets of analyses. First, is by Food Frequency Questionnaires (FFQ) (Slide 16, Step 1A: FFQ/DHQII)

  1. Create matrix of gut microbiome and dietary variables from FFQ (DHQII)

  2. Remove all taxa (genus) that have <2% relative abundance in any sample of the gut microbiome

  3. For each taxon identify the median by sample time point

  4. For each measure of alpha diversity identify the median by time point

  5. For each dietary (all listed) variable

    1. Conduct 2-tailed t-test on dietary variables above vs below the median of each taxon
    2. Conduct spearman correlation between dietary variables and taxon
    3. Conduct 2-tailed t-test on dietary variables above vs below the median of each measure of alpha diversity
    4. Conduct spearman correlation between dietary variables and each measure of alpha diversity
  6. List all t-test results that have p<0.1 AND correlation R2 >0.5

  7. Make heat map of the spearman correlation results meeting the above criteria colored by strength of correlation and *’d if p<0.05 (see example)

Second, is by ASA24 survey of food.

  1. Create matrix of gut microbiome and dietary variables matched by time point (i.e. gut sample 1 AND ASA24 RecallNo 1)

  2. Remove all taxa (genus) that have <2% relative abundance in any sample of the gut microbiome

  3. For each taxon identify the median by sample time point

  4. For each measure of alpha diversity identify the median by time point

  5. For each dietary (all listed) variable

    1. Conduct 2-tailed t-test on dietary variables above vs below the median of each taxon
    2. Conduct spearman correlation between dietary variables and taxon
    3. Conduct 2-tailed t-test on dietary variables above vs below the median of each measure of alpha diversity
    4. Conduct spearman correlation between dietary variables and each measure of alpha diversity
  6. List all t-test results that have p<0.1 AND correlation R2 >0.5

  7. Make heat map of the spearman correlation results meeting the above criteria colored by strength of correlation and satrred’d if p<0.05 (see example)

ANOVA Analyses

First, I need to create the function that will be used across the Dietary factors and sets of data.

## STandard error calc.
se <- function(x, na.rm=FALSE) {
  if (na.rm) x <- na.omit(x)
  sqrt(var(x)/length(x))
}

## Estimating omega**2
o2 <- function(ss, sst, df, mse){
  (ss - df*mse)/(sst + mse)
}
omega2 <- function(fit.sum){
  fit.sum <- fit.sum[[1]]
  k <- nrow(fit.sum) - 1
  ms_error <- fit.sum$`Mean Sq`[k+1]
  p.omega <- matrix(NA,ncol=1,nrow=(k))
  dfs <- fit.sum$Df
  sss <- fit.sum$`Sum Sq`
  SST <- sum(fit.sum$`Sum Sq`)
  for(i in 1:(k))
  {
    p.omega[i,] <- round(o2(sss[i], SST, dfs[i], ms_error),4)
  }
  rownames(p.omega) <- rownames(fit.sum)[1:(k)]
  colnames(p.omega) <- "omega2"
  return(p.omega)
}

# Estimating eta**2 (r**2)
e2 <- function(ss,sst){
  (ss)/(sst)
}
eta2 <- function(fit.sum){
  fit.sum <- fit.sum[[1]]
  k <- nrow(fit.sum) - 1
  eta <- matrix(NA,ncol=1,nrow=(k))
  sss <- fit.sum$`Sum Sq`
  SST <- sum(sss)
  for(i in 1:(k))
  {
    eta[i,] <- round(e2(sss[i],SST),4)
  }
  rownames(eta) <- rownames(fit.sum)[1:(k)]
  colnames(eta) <- "eta2"
  return(eta)
}


anova_assumptions_check <- function(dat, outcome, factors, stats = NULL){
  cat('\n ============================= \n')
  cat('\n Tests and Plots of Normality:\n')
  # Assess normality
  aov.out = aov(
    as.formula(paste(outcome, '~',
               paste(factors,collapse = "*"))), 
    data = dat)
  #par(mfrow = c(2, 2))
  plot(aov.out)
  #dev.off()
  # shapiro-wilks test
  if(length(aov.out$residuals) > 5000){ 
    res <- sample(aov.out$residuals, 5000)
  } else res <- aov.out$residuals
  cat('\n Shapiro-Wilks Test of Normality of Residuals:\n')
  print(shapiro.test(res))
  # K-S Test 
  cat('\n K-S Test for Normality of Residuals:\n')
  print(ks.test(aov.out$residuals, 'pnorm', 
          alternative = 'two.sided'))
  cat('\n')

  # boxplots
  print(
    ggplot(dat, 
           aes_string(
             y=outcome,x=factors[1],
             color = factors[1])
           ) +
      geom_boxplot(outlier.alpha=0) +
      geom_point(position = 'jitter') +
      labs(title = paste(outcome, 'distribution by', factors[1])) +
      scale_color_brewer(palette="Dark2") +
      theme_bw()
  ) ## End Print
  
  cat('\n ============================= \n')
  cat('\n Tests of Homogeneity of Variance\n')
  # Varainces
  cat('\n \n Levenes Test: ', factors[1], '\n \n \n')
  print(
    car::leveneTest(
      as.formula(paste(outcome, '~',factors[1])),
             data = dat,
             center="mean")
  ) ## End Print
}

## ffq_anova: function to run anovas for ffq variables
# data = dataset that contains all the variables needed
# v.n = variable.name of the outcome (dietary factor) of interest
# gorup = grouping variable for ANOVA
ffq_anova <- function(data, v.n, group){
  # data <- ffq_t1
  # v.n <- "HEI2010_TOTAL_SCORE"
  # group <- 'BMI_Class'
  
  if(anyNA(data[,group])){
    cat('\nOne or more cases were removed due to missing value on the grouping variable.\n\n')
    data <- data[is.na(data[,group]) == F, ]
  }
  
  
  out <- numeric()
  ## First calculate summary statistics
  m <- numeric(2)
  m[1] <- mean(data[,v.n, drop=T], na.rm = T) ## mean
  m[2] <- se(data[,v.n, drop=T], na.rm = T) ## se
  names(m) <- c('Total', 'SE Total')
  
  ## Start to get the by group means
  summ <- paste0('mean(', v.n, ', na.rm=T)')  # construct summary method, e.g. mean(mpg)
  summ.name <- paste0('mean_', v.n)  # construct summary variable name, e.g. mean_mpg
  ## Calc summary stats by group.
  m.g <- data %>%
    group_by_(group) %>%
    summarise_(.dots = setNames(summ, summ.name))
  mg <- m.g[,2, drop = T]
  names(mg) <- m.g[,1, drop = T]
  
  summ <- paste0('se(', v.n, ', na.rm=T)')  # construct summary method, e.g. mean(mpg)
  summ.name <- paste0('se_', v.n)  # construct summary variable name, e.g. mean_mpg
  ## Calc summary stats by group.
  mse.g <- data %>%
    group_by_(group) %>%
    summarise_(.dots = setNames(summ, summ.name))
  mseg <- mse.g[,2, drop = T]
  names(mseg) <- paste0('SE ',mse.g[,1, drop = T])
  # reorder mg and mseg so that they line up
  i <- j <- 1
  k <- length(mg)
  mg.out <- n <- numeric(k*2)
  while(i <= k){
    mg.out[j] <- mg[i]
      n[j] <- names(mg)[i]
    mg.out[j+1] <- mseg[i]
      n[j+1] <- names(mseg)[i]
    i <- i +1
    j <- j + 2
  }
  names(mg.out) <- n
  ## Now run one-way ANOVA Assumption check
  anova_assumptions_check(data, v.n, group)
  
  ## Run One-Way ANOVA
  f.out <- numeric(5)
  aov.sum <- summary(aov(as.formula(paste0(v.n, "~", group)), data = data))
  f.out[1:2] <- aov.sum[[1]]$Df
  f.out[3] <- aov.sum[[1]]$`F value`[1]
  f.out[4] <- aov.sum[[1]]$`Pr(>F)`[1]
  o.out <- omega2(aov.sum)
  f.out[5] <- ifelse(o.out < 0, 0, o.out)
  f.out[6] <- eta2(aov.sum)
  names(f.out) <- c('df Effect', 'df Error', "f-value", "p-value", 'omega2', 'eta2')
  
  
  ## Output matrix
  out <- c(m, mg.out, f.out)
  return(out)
}

Running ANOVAs for Previously calc. HEI Scores

Time Point 1:RecallNum 1 - BMI Categories

metrics <- c('HEI2010_TOTAL_SCORE', 'HEIX1_TOTALVEG', 'HEIX2_GREEN_AND_BEAN', 'HEIX3_TOTALFRUIT', 'HEIX4_WHOLEFRUIT', 'HEIX5_WHOLEGRAIN', 'HEIX6_TOTALDAIRY', 'HEIX7_TOTPROT', 'HEIX8_SEAPLANT_PROT', 'HEIX9_FATTYACID', 'HEIX10_SODIUM', 'HEIX11_REFINEDGRAIN', 'HEIX12_SOFAAS')


group.var <- c('BMI_Class', 'GWG_Cat')

## Data Subset
## subset to only Stool (S) and first timepoint (01)
ffq_t1 <- filter(cfrip_data, SampleSource == 'S-01', RecallNo == 1 )


## Initialize results matrix
results.bmi <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

iter<- 1
for(met in metrics){
  results.bmi[iter,1] <- met
  res.out <- ffq_anova(data=ffq_t1, v.n=met, group= group.var[1])
  results.bmi[iter,2:15] <- res.out
  
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95652, p-value = 0.5361
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.49767, p-value = 0.0001209
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.7153  0.505
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93, p-value = 0.1942
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.17896, p-value = 0.5522
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.5013 0.6155
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.68676, p-value = 5.626e-05
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.46434, p-value = 0.0008512
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.5524 0.5869
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.91678, p-value = 0.1134
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.37609, p-value = 0.01229
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1084  0.898
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87386, p-value = 0.02061
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.42914, p-value = 0.002641
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  2.9118 0.08541 .
##       15                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93114, p-value = 0.2034
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.35478, p-value = 0.02153
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.4263  0.271
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93822, p-value = 0.2701
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.36224, p-value = 0.01776
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.0737 0.9293
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.8096, p-value = 0.002082
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.30331, p-value = 0.07289
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.6899 0.1004
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92919, p-value = 0.1879
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.3294, p-value = 0.04023
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2   1.986 0.1717
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92386, p-value = 0.1513
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.37755, p-value = 0.01181
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2   2.331 0.1314
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87051, p-value = 0.01814
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.48551, p-value = 0.0004127
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.6257 0.1053
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.94262, p-value = 0.3211
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.42932, p-value = 0.002626
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1417 0.8691
##       15               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9522, p-value = 0.4605
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.3325, p-value = 0.03737
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.0209 0.9794
##       15
colnames(results.bmi) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.bmi, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Normal SE Normal Class 1 SE Class 1 Class 2-3 SE Class 2-3 df Effect df Error f-value p-value omega2 eta2
HEI2010_TOTAL_SCORE 52.889 3.507 56.434 4.708 59.248 8.313 45.308 4.970 2 15 1.653 0.225 0.068 0.181
HEIX1_TOTALVEG 3.195 0.318 2.943 0.566 3.791 0.480 2.986 0.581 2 15 0.649 0.536 0.000 0.080
HEIX2_GREEN_AND_BEAN 1.258 0.496 0.833 0.833 1.528 0.926 1.429 0.922 2 15 0.168 0.847 0.000 0.022
HEIX3_TOTALFRUIT 2.408 0.555 3.489 0.959 2.420 0.965 1.474 0.911 2 15 1.213 0.325 0.023 0.139
HEIX4_WHOLEFRUIT 1.746 0.564 2.737 1.034 2.000 1.225 0.714 0.714 2 15 1.228 0.321 0.025 0.141
HEIX5_WHOLEGRAIN 4.291 0.861 4.645 1.357 5.116 2.141 3.398 1.294 2 15 0.336 0.720 0.000 0.043
HEIX6_TOTALDAIRY 4.777 0.894 3.123 1.462 4.303 1.768 6.533 1.372 2 15 1.426 0.271 0.045 0.160
HEIX7_TOTPROT 4.266 0.295 3.537 0.677 4.651 0.311 4.615 0.385 2 15 1.637 0.227 0.066 0.179
HEIX8_SEAPLANT_PROT 1.832 0.553 2.953 0.944 2.000 1.225 0.752 0.709 2 15 1.527 0.249 0.055 0.169
HEIX9_FATTYACID 6.027 0.843 7.928 0.974 6.999 1.902 3.704 1.117 2 15 3.141 0.073 0.192 0.295
HEIX10_SODIUM 3.275 0.865 4.599 1.873 2.373 1.271 2.783 1.336 2 15 0.574 0.575 0.000 0.071
HEIX11_REFINEDGRAIN 5.130 0.973 4.410 1.721 7.211 1.899 4.262 1.533 2 15 0.868 0.440 0.000 0.104
HEIX12_SOFAAS 14.684 0.927 15.237 1.550 16.857 1.729 12.658 1.354 2 15 1.946 0.177 0.095 0.206
ffq_t1 %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        6
## 2 Class 1       5
## 3 Class 2-3     7

Time Point 2:RecallNum 2 - BMI Categories

## Data
## subset to only Stool (S) and second timepoint (02) 
ffq_t2 <- filter(cfrip_data, SampleSource == "S-02", RecallNo == 2)

## Initize reults matrix
results.bmi <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

iter<- 1
for(met in metrics){
  results.bmi[iter,1] <- met
  res.out <- ffq_anova(data=ffq_t2, v.n=met, group= group.var[1])
  results.bmi[iter,2:15] <- res.out
  
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.90821, p-value = 0.2024
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.44786, p-value = 0.01029
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  11.187 0.003627 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.94647, p-value = 0.586
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.14627, p-value = 0.9272
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.9496 0.1979
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.91276, p-value = 0.2314
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.4202, p-value = 0.02888
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.8653   0.21
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95727, p-value = 0.7442
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.29164, p-value = 0.2592
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  8.4581 0.008571 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.97013, p-value = 0.9121
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.26366, p-value = 0.3165
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  6.4643 0.01818 *
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.91224, p-value = 0.2279
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.31911, p-value = 0.1735
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.0448 0.1853
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.94779, p-value = 0.6049
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.37169, p-value = 0.07261
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  3.2051 0.08891 .
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.8525, p-value = 0.03942
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.26123, p-value = 0.386
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  4.8837 0.03662 *
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.79248, p-value = 0.007704
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.45221, p-value = 0.01478
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1607 0.8539
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92649, p-value = 0.3444
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.2864, p-value = 0.23
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.9151 0.1057
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92276, p-value = 0.3096
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.49118, p-value = 0.006114
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1576 0.8565
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87325, p-value = 0.07187
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.5144, p-value = 0.001788
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.4908 0.6276
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9294, p-value = 0.3738
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.39364, p-value = 0.03463
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2   0.233 0.7968
##        9
colnames(results.bmi) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.bmi, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Normal SE Normal Class 1 SE Class 1 Class 2-3 SE Class 2-3 df Effect df Error f-value p-value omega2 eta2
HEI2010_TOTAL_SCORE 53.303 3.291 60.693 2.166 51.718 13.160 50.401 2.165 2 9 0.827 0.468 0.000 0.155
HEIX1_TOTALVEG 2.170 0.422 2.064 0.113 3.182 1.133 1.717 0.616 2 9 1.019 0.399 0.003 0.185
HEIX2_GREEN_AND_BEAN 1.915 0.622 1.667 1.667 3.181 1.596 1.406 0.645 2 9 0.663 0.539 0.000 0.128
HEIX3_TOTALFRUIT 3.250 0.646 5.000 0.000 1.714 1.644 3.143 0.868 2 9 1.893 0.206 0.130 0.296
HEIX4_WHOLEFRUIT 3.193 0.643 5.000 0.000 1.761 1.622 3.006 0.870 2 9 1.890 0.206 0.129 0.296
HEIX5_WHOLEGRAIN 2.032 0.672 1.006 0.528 2.738 1.721 2.191 1.084 2 9 0.395 0.684 0.000 0.081
HEIX6_TOTALDAIRY 4.756 0.906 2.656 0.885 4.333 1.586 6.018 1.476 2 9 1.233 0.336 0.037 0.215
HEIX7_TOTPROT 4.462 0.211 4.562 0.246 4.565 0.237 4.360 0.410 2 9 0.097 0.909 0.000 0.021
HEIX8_SEAPLANT_PROT 1.921 0.665 2.383 1.448 1.667 1.667 1.817 0.969 2 9 0.070 0.933 0.000 0.015
HEIX9_FATTYACID 6.230 0.991 9.467 0.533 7.179 1.411 4.137 1.362 2 9 3.924 0.060 0.328 0.466
HEIX10_SODIUM 3.481 1.078 5.091 2.586 4.086 2.093 2.373 1.534 2 9 0.533 0.604 0.000 0.106
HEIX11_REFINEDGRAIN 4.229 1.173 3.794 2.810 3.262 3.167 4.931 1.504 2 9 0.162 0.853 0.000 0.035
HEIX12_SOFAAS 15.664 1.165 18.005 1.995 14.050 2.963 15.300 1.607 2 9 0.732 0.508 0.000 0.140
ffq_t2 %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        3
## 2 Class 1       3
## 3 Class 2-3     6

Time Point 1:RecallNum 1 - GWG Categories ANOVA for women with pre-preg BMI >= 30

results.gwg <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

## Subset to BMI>30
ffq_t1.bmi30p <- filter(ffq_t1, PrePreg_BMI >=30)


iter<- 1
for(met in metrics){
  results.gwg[iter,1] <- met
  res.out <- ffq_anova(data=ffq_t1.bmi30p, v.n=met, group= group.var[2])
  k <- length(res.out)
  #if(k != 14) 
  results.gwg[iter,2:15] <- res.out
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.96267, p-value = 0.8212
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.49995, p-value = 0.002684
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.3442 0.3084
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95625, p-value = 0.7293
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.19377, p-value = 0.6902
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.6929  0.525
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.71891, p-value = 0.00129
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.47768, p-value = 0.008369
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.3819 0.6931
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87156, p-value = 0.0684
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.32391, p-value = 0.1611
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value    Pr(>F)    
## group  2  17.479 0.0007951 ***
##        9                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.82333, p-value = 0.01747
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.39435, p-value = 0.04788
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  8.5414 0.008327 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92381, p-value = 0.319
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.47038, p-value = 0.005886
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.2553 0.7801
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93616, p-value = 0.45
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.37021, p-value = 0.07456
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.4339 0.1429
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.75538, p-value = 0.003043
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.33333, p-value = 0.1389
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  3.8413 0.06222 .
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.83175, p-value = 0.02201
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.39435, p-value = 0.04788
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  8.2923 0.009082 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.89445, p-value = 0.1345
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.36634, p-value = 0.07983
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  8.8275 0.007553 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.86848, p-value = 0.06251
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.42121, p-value = 0.0283
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value   Pr(>F)   
## group  2  13.679 0.001868 **
##        9                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.96732, p-value = 0.8808
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.39839, p-value = 0.04434
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2   0.311 0.7403
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.90817, p-value = 0.2021
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.41676, p-value = 0.03095
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.4785 0.6346
##        9
colnames(results.gwg) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.gwg, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Below SE Below Within SE Within Above SE Above df Effect df Error f-value p-value omega2 eta2
HEI2010_TOTAL_SCORE 51.117 4.756 50.309 7.385 59.065 8.447 38.946 4.862 2 9 1.544 0.265 0.083 0.256
HEIX1_TOTALVEG 3.322 0.396 2.941 0.743 3.295 0.732 3.874 0.556 2 9 0.351 0.713 0.000 0.072
HEIX2_GREEN_AND_BEAN 1.470 0.633 1.250 1.250 1.528 0.926 1.667 1.667 2 9 0.028 0.973 0.000 0.006
HEIX3_TOTALFRUIT 1.868 0.652 2.500 1.443 2.349 1.005 0.225 0.113 2 9 1.079 0.380 0.013 0.193
HEIX4_WHOLEFRUIT 1.250 0.653 1.250 1.250 2.000 1.225 0.000 0.000 2 9 0.692 0.525 0.000 0.133
HEIX5_WHOLEGRAIN 4.114 1.137 3.097 1.801 5.833 1.806 2.603 2.603 2 9 0.800 0.479 0.000 0.151
HEIX6_TOTALDAIRY 5.604 1.087 6.874 2.021 4.999 2.093 4.918 1.271 2 9 0.298 0.749 0.000 0.062
HEIX7_TOTPROT 4.630 0.249 4.327 0.673 4.651 0.311 5.000 0.000 2 9 0.476 0.636 0.000 0.096
HEIX8_SEAPLANT_PROT 1.272 0.649 1.250 1.250 2.052 1.204 0.000 0.000 2 9 0.744 0.502 0.000 0.142
HEIX9_FATTYACID 5.077 1.089 5.009 1.514 6.222 2.317 3.259 0.920 2 9 0.530 0.606 0.000 0.105
HEIX10_SODIUM 2.613 0.904 3.791 2.189 2.373 1.271 1.440 1.153 2 9 0.459 0.646 0.000 0.092
HEIX11_REFINEDGRAIN 5.491 1.219 4.488 1.864 7.211 1.899 3.960 3.069 2 9 0.683 0.529 0.000 0.132
HEIX12_SOFAAS 14.408 1.194 13.532 2.180 16.552 1.809 12.001 1.924 2 9 1.351 0.307 0.055 0.231
ffq_t1.bmi30p  %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       4
## 2 Within      5
## 3 Above       3

Time Point 2:RecallNum 2 - GWG Categories ANOVA for women with pre-preg BMI >= 30

results.gwg <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

## Subset to BMI>30
ffq_t2.bmi30p <- filter(ffq_t2, PrePreg_BMI >=30)


iter<- 1
for(met in metrics){
  results.gwg[iter,1] <- met
  res.out <- ffq_anova(data=ffq_t2.bmi30p, v.n=met, group= group.var[2])
  results.gwg[iter,2:15] <- res.out
  
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92738, p-value = 0.4568
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.47886, p-value = 0.02029
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.0497 0.4065
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9069, p-value = 0.2947
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.17138, p-value = 0.9158
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.5329 0.2899
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.90959, p-value = 0.313
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.34374, p-value = 0.188
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.2643 0.7762
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.83843, p-value = 0.05548
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.39665, p-value = 0.1178
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.6949 0.2609
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.94844, p-value = 0.6729
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.34034, p-value = 0.1967
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1787 0.8407
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87614, p-value = 0.1429
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.32963, p-value = 0.2821
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.8031 0.2437
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.91271, p-value = 0.3353
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.36109, p-value = 0.1912
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  9.8228 0.01281 *
##        6                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.92389, p-value = 0.4254
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.23096, p-value = 0.7231
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  5.0971 0.05086 .
##        6                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.77678, p-value = 0.01106
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.24156, p-value = 0.6698
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  3.0574 0.1215
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.86718, p-value = 0.1147
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.34163, p-value = 0.1933
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.3881 0.3195
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93087, p-value = 0.4897
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.33224, p-value = 0.2736
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  5.6198 0.04216 *
##        6                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.85221, p-value = 0.07881
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.44402, p-value = 0.03908
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.4241 0.1692
##        6               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93537, p-value = 0.534
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.33256, p-value = 0.2178
## alternative hypothesis: two-sided

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  6.1487 0.03526 *
##        6                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
colnames(results.gwg) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.gwg, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Below SE Below Within SE Within Above SE Above df Effect df Error f-value p-value omega2 eta2
HEI2010_TOTAL_SCORE 50.840 4.054 51.559 4.221 52.657 8.952 46.127 5.260 2 6 0.158 0.857 0.000 0.050
HEIX1_TOTALVEG 2.206 0.570 1.370 0.504 3.178 1.106 1.514 0.412 2 6 1.239 0.354 0.050 0.292
HEIX2_GREEN_AND_BEAN 1.998 0.688 1.152 1.152 2.992 1.144 1.278 1.278 2 6 0.795 0.494 0.000 0.210
HEIX3_TOTALFRUIT 2.667 0.772 3.333 1.667 3.079 1.160 0.841 0.841 2 6 0.761 0.507 0.000 0.202
HEIX4_WHOLEFRUIT 2.591 0.760 1.909 1.560 3.558 1.100 1.681 1.681 2 6 0.586 0.585 0.000 0.164
HEIX5_WHOLEGRAIN 2.373 0.863 2.783 1.723 2.678 1.563 1.150 1.150 2 6 0.233 0.799 0.000 0.072
HEIX6_TOTALDAIRY 5.457 1.094 9.122 0.878 2.893 0.619 5.087 2.236 2 6 10.429 0.011 0.677 0.777
HEIX7_TOTPROT 4.429 0.275 3.721 0.657 4.877 0.123 4.593 0.407 2 6 2.283 0.183 0.222 0.432
HEIX8_SEAPLANT_PROT 1.767 0.790 0.426 0.426 1.250 1.250 4.811 0.189 2 6 3.766 0.087 0.381 0.557
HEIX9_FATTYACID 5.151 1.093 1.675 1.625 7.110 1.086 6.446 0.755 2 6 5.297 0.047 0.488 0.638
HEIX10_SODIUM 2.944 1.195 4.747 2.478 3.065 1.798 0.000 0.000 2 6 1.076 0.399 0.017 0.264
HEIX11_REFINEDGRAIN 4.375 1.362 3.633 2.006 5.378 2.580 3.480 3.338 2 6 0.173 0.845 0.000 0.054
HEIX12_SOFAAS 14.883 1.360 17.688 1.637 12.597 2.456 15.247 0.124 2 6 1.518 0.293 0.103 0.336
ffq_t2.bmi30p  %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       3
## 2 Within      4
## 3 Above       2

Analyses with Newly Created HEI Scores

  1. HEI total (HEI2015_TOTAL_SCORE)
  2. Total Veg (HEI2015C1_TOTALVEG)
  3. Green beans (HEIX2_GREEN_AND_BEAN)
  4. Total fruit (HEI2015C3_TOTALFRUIT)
  5. Whole fruit (HEI2015C4_WHOLEFRUIT)
  6. Whole grains (HEI2015C5_WHOLEGRAIN)
  7. Dairy (HEI2015C6_TOTALDAIRY)
  8. Total protein (HEI2015C7_TOTPROT)
  9. Seafood (HEI2015C8_SEAPLANT_PROT)
  10. Fatty Acids (HEI2015C9_FATTYACID)
  11. Sodium (HEI2015C10_SODIUM)
  12. Refined Grains (HEI2015C11_REFINEDGRAIN)
  13. Saturated Fat (HEI2015C12_SFAT)
  14. Added Sugars (HEI2015C13_ADDSUG)

BMI Categories vs New HEI Scores

#metrics <- c('HEI2015_TOTAL_SCORE','HEI2015C1_TOTALVEG','HEIX2_GREEN_AND_BEAN','HEI2015C3_TOTALFRUIT','HEI2015C4_WHOLEFRUIT','HEI2015C5_WHOLEGRAIN','HEI2015C6_TOTALDAIRY','HEI2015C7_TOTPROT','HEI2015C8_SEAPLANT_PROT','HEI2015C9_FATTYACID','HEI2015C10_SODIUM','HEI2015C11_REFINEDGRAIN','HEI2015C12_SFAT','HEI2015C13_ADDSUG')


metrics <- c("heix1_totalveg",                    "heix2_greens_and_bean", "heix3_totalfruit" ,           "heix4_wholefruit",             "heix5_wholegrain",                  "heix6_totaldairy" ,                
"heix7_totprot" ,                    "heix8_seaplant_prot",              
"heix9_fattyacid",                   "heix10_sodium",                    
"heix11_refinedgrain",               "heix12_sofaas","hei2010_total_score")

group.var <- c('BMI_Class', 'GWG_Cat')
results.bmi <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

iter<- 1
for(met in metrics){
  results.bmi[iter,1] <- met
  res.out <- ffq_anova(data=hei_scores, v.n=met, group= group.var[1])
  results.bmi[iter,2:15] <- res.out
  
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.84523, p-value = 0.009079
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.28844, p-value = 0.1182
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.6372 0.5434
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.932, p-value = 0.2351
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.28339, p-value = 0.1303
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.0554 0.9463
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.84727, p-value = 0.009748
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.32353, p-value = 0.05694
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value    Pr(>F)    
## group  2  13.094 0.0006227 ***
##       14                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.72382, p-value = 0.000218
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.38235, p-value = 0.01388
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value    Pr(>F)    
## group  2  52.315 3.188e-07 ***
##       14                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.97173, p-value = 0.848
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.29125, p-value = 0.09012
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2   2.242  0.143
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.91704, p-value = 0.1316
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.3468, p-value = 0.02487
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.1161  0.355
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.99115, p-value = 0.9996
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.10067, p-value = 0.9883
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.1958 0.8244
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.94068, p-value = 0.3263
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.18933, p-value = 0.5759
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.4866 0.1191
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.86644, p-value = 0.0193
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.44035, p-value = 0.001611
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.9067 0.1852
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95844, p-value = 0.6024
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.3082, p-value = 0.07914
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.0247 0.1689
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.90911, p-value = 0.09663
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.25518, p-value = 0.2182
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.4321 0.6575
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95726, p-value = 0.5807
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.46435, p-value = 0.0007054
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.3552 0.7072
##       14               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.9476, p-value = 0.4196
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.47059, p-value = 0.0005643
## alternative hypothesis: two-sided
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  BMI_Class 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.5392 0.5949
##       14
colnames(results.bmi) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.bmi, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Normal SE Normal Class 1 SE Class 1 Class 2-3 SE Class 2-3 df Effect df Error f-value p-value omega2 eta2
heix1_totalveg 3.248 0.339 3.355 0.635 3.842 0.496 2.746 0.592 2 14 0.907 0.426 0.000 0.115
heix2_greens_and_bean 3.078 0.427 3.090 0.868 3.866 0.707 2.506 0.675 2 14 0.854 0.447 0.000 0.109
heix3_totalfruit 4.281 0.331 5.000 0.000 2.963 0.829 4.709 0.291 2 14 5.084 0.022 0.324 0.421
heix4_wholefruit 4.657 0.239 5.000 0.000 3.834 0.732 5.000 0.000 2 14 3.135 0.075 0.201 0.309
heix5_wholegrain 4.126 0.677 5.868 1.560 2.021 0.832 4.384 0.764 2 14 3.038 0.080 0.193 0.303
heix6_totaldairy 4.995 0.746 4.557 1.469 5.391 1.750 5.025 1.036 2 14 0.082 0.922 0.000 0.012
heix7_totprot 2.701 0.325 2.045 0.556 3.566 0.436 2.552 0.550 2 14 1.867 0.191 0.092 0.210
heix8_seaplant_prot 3.078 0.424 2.612 0.619 4.135 0.620 2.657 0.781 2 14 1.351 0.291 0.040 0.162
heix9_fattyacid 5.342 0.797 5.443 1.832 5.540 1.698 5.128 1.064 2 14 0.023 0.977 0.000 0.003
heix10_sodium 5.815 0.816 7.715 0.773 4.369 1.945 5.490 1.237 2 14 1.348 0.291 0.039 0.162
heix11_refinedgrain 8.473 0.387 8.158 0.879 8.845 0.679 8.432 0.594 2 14 0.213 0.811 0.000 0.030
heix12_sofaas 12.634 1.484 16.688 1.821 12.513 2.686 9.824 2.443 2 14 2.087 0.161 0.113 0.230
hei2010_total_score 62.426 2.709 69.531 5.278 60.885 3.964 58.453 4.225 2 14 1.618 0.233 0.068 0.188
hei_scores %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        6
## 2 Class 1       6
## 3 Class 2-3     7

GWG Categories ANOVA for women with pre-preg BMI >= 30

results.gwg <- as.data.frame(matrix(NA, nrow=length(metrics), ncol=15))

hei_scores.bmi30p <- filter(hei_scores, PrePreg_BMI >=30)
iter<- 1
for(met in metrics){
  results.gwg[iter,1] <- met
  res.out <- ffq_anova(data=hei_scores.bmi30p, v.n=met, group= group.var[2])
  results.gwg[iter,2:15] <- res.out
  
  iter <- iter + 1
}
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.90537, p-value = 0.186
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.16496, p-value = 0.8997
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.9767 0.1018
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.89114, p-value = 0.1219
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.23148, p-value = 0.5411
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.1382 0.1739
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.87541, p-value = 0.07657
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.25, p-value = 0.4413
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  6.9123 0.01518 *
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.79333, p-value = 0.007877
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.33333, p-value = 0.1389
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2   7.113 0.01403 *
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.97124, p-value = 0.9233
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.25718, p-value = 0.3448
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  2.3438 0.1516
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.97936, p-value = 0.981
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.36026, p-value = 0.06692
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.7556 0.4973
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.96913, p-value = 0.9015
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.12606, p-value = 0.9785
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.7241  0.511
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.85152, p-value = 0.03834
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.34169, p-value = 0.1213
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.8776 0.2082
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.89252, p-value = 0.127
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.40204, p-value = 0.02903
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.0643 0.3847
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.89051, p-value = 0.1196
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.36572, p-value = 0.06034
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.5226 0.6099
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.8687, p-value = 0.06293
## 
## 
##  K-S Test for Normality of Residuals:
## Warning in ks.test(aov.out$residuals, "pnorm", alternative = "two.sided"): ties should not be present for the Kolmogorov-Smirnov test

## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.32343, p-value = 0.1624
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  0.5756 0.5818
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.93318, p-value = 0.4151
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.41346, p-value = 0.0227
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  2  1.2299 0.3371
##        9               
## 
##  ============================= 
## 
##  Tests and Plots of Normality:

## 
##  Shapiro-Wilks Test of Normality of Residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  res
## W = 0.95425, p-value = 0.6997
## 
## 
##  K-S Test for Normality of Residuals:
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  aov.out$residuals
## D = 0.53693, p-value = 0.0009215
## alternative hypothesis: two-sided
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  ============================= 
## 
##  Tests of Homogeneity of Variance
## 
##  
##  Levenes Test:  GWG_Cat 
##  
##  
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  2  4.1666 0.05238 .
##        9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
colnames(results.gwg) <- c('Dietary Factor', names(res.out))
##Print out summary table
kable(results.gwg, format = 'html', digits=3) %>%
  kable_styling(full_width = T)
Dietary Factor Total SE Total Below SE Below Within SE Within Above SE Above df Effect df Error f-value p-value omega2 eta2
heix1_totalveg 3.203 0.419 2.506 0.841 4.320 0.425 2.270 0.204 2 9 3.937 0.059 0.329 0.467
heix2_greens_and_bean 3.073 0.511 2.376 0.946 4.518 0.482 1.594 0.176 2 9 5.477 0.028 0.427 0.549
heix3_totalfruit 3.982 0.445 5.000 0.000 3.398 0.635 3.596 1.404 2 9 1.425 0.290 0.066 0.240
heix4_wholefruit 4.514 0.333 5.000 0.000 4.519 0.481 3.859 1.141 2 9 0.807 0.476 0.000 0.152
heix5_wholegrain 3.400 0.644 4.297 0.582 2.266 0.754 4.093 2.166 2 9 1.145 0.360 0.024 0.203
heix6_totaldairy 5.177 0.899 4.799 1.358 4.473 1.337 6.856 2.492 2 9 0.544 0.598 0.000 0.108
heix7_totprot 2.974 0.384 2.795 0.975 3.428 0.467 2.458 0.543 2 9 0.503 0.621 0.000 0.100
heix8_seaplant_prot 3.273 0.548 2.945 1.213 3.305 0.902 3.656 0.904 2 9 0.101 0.905 0.000 0.022
heix9_fattyacid 5.300 0.895 4.947 1.818 5.694 1.642 5.113 1.341 2 9 0.059 0.943 0.000 0.013
heix10_sodium 5.023 1.043 6.667 1.628 4.435 1.909 3.812 1.915 2 9 0.602 0.568 0.000 0.118
heix11_refinedgrain 8.604 0.431 8.610 1.058 8.679 0.636 8.471 0.695 2 9 0.015 0.985 0.000 0.003
heix12_sofaas 10.944 1.775 8.132 4.287 11.766 2.523 13.324 1.645 2 9 0.643 0.548 0.000 0.125
hei2010_total_score 59.466 2.862 58.075 7.512 60.800 4.000 59.099 3.337 2 9 0.072 0.931 0.000 0.016
hei_scores.bmi30p %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       4
## 2 Within      5
## 3 Above       4

Mapping data for Microbiome Analyses

First I need to wrestle with the microbiome data to connect those data with the diet data… here we go.

# OTU Data
otu_table_dat <- read.table("otu_table_rarefied.txt", sep = "\t", header = T)
rownames(otu_table_dat) <- otu_table_dat$X
otu_table_mydata <- as.matrix(otu_table_dat[,2:111])
OTU <- otu_table(otu_table_mydata, taxa_are_rows = T, errorIfNULL = F)

# Taxonomy Data
taxa_table_dat <- read.table("taxa_table.txt", sep = "\t", header = T)
rownames(taxa_table_dat) <- taxa_table_dat$X
taxa_table_dat <- as.matrix(taxa_table_dat[,2:7])
TAX <- tax_table(taxa_table_dat)

# # Make Phylogenic Tree
# taxa_table_dat <- read.table("taxa_table.txt", sep = "\t", header = T)
# taxa_table_dat$Kingdom <- as.factor(taxa_table_dat$Kingdom)
# taxa_table_dat$Phylum <- as.factor(taxa_table_dat$Phylum)
# taxa_table_dat$Class <- as.factor(taxa_table_dat$Class)
# taxa_table_dat$Order <- as.factor(taxa_table_dat$Order)
# taxa_table_dat$Family <- as.factor(taxa_table_dat$Family)
# taxa_table_dat$Genus <- as.factor(taxa_table_dat$Genus)
# 
# dat_tree <- as.phylo(~Phylum/Class/Order/Family/Genus, data = taxa_table_dat, use.labels = T)
# dat_tree$tip.label <- taxa_names(TAX)


## Use the above data as the sampledata
samp.names <- colnames(otu_table_mydata)
subdat <- cfrip_data[ cfrip_data$SampleID %in% samp.names,]
subdat <- as.data.frame(subdat)
rownames(subdat) <- subdat$SampleID
samdat <- sample_data(subdat)


# Combine the data
preg_microbiome_dat <- phyloseq(OTU, TAX, samdat)
preg_microbiome_dat
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 363 taxa and 102 samples ]
## sample_data() Sample Data:       [ 102 samples by 471 sample variables ]
## tax_table()   Taxonomy Table:    [ 363 taxa by 6 taxonomic ranks ]
# Now prepare data for Jun's Code:
# Start to get the file in the Jun Chen format
otus <- psmelt(preg_microbiome_dat)
otus <- otus[ otus$Sample %in% subdat$SampleID, ]
k <- ncol(otus)
otus <- otus[,c(1:3,(k-5):k)]
otus2 <- reshape(otus, idvar = "OTU", timevar = "Sample", direction = "wide")
OTU <- otus2$OTU
Kingdom <- otus2$Kingdom.Faucher.0008459
Phylum <- otus2$Phylum.Faucher.0008459
Class <- otus2$Class.Faucher.0008459
Order <- otus2$Order.Faucher.0008459
Family <- otus2$Family.Faucher.0008459
Genus <- otus2$Genus.Faucher.0008459
# Next get just the observed counts of the OTUs
observed_counts <- otus2[, grep("Abundance",colnames(otus2)) ]
i <- 1
for(i in 1:length(colnames(observed_counts))){
  colnames(observed_counts)[i] <- substring(colnames(observed_counts)[i], 11)
}

# OTU Names object
otu.name <- cbind(Kingdom,Phylum,Class,Order,Family,Genus)
rownames(otu.name) <- OTU

# Abundance List Objects
abund.list <- list( cbind(Kingdom,observed_counts),
                    cbind(Phylum,observed_counts),
                    cbind(Class,observed_counts),
                    cbind(Order,observed_counts),
                    cbind(Family,observed_counts),
                    cbind(Genus,observed_counts),
                    cbind(OTU,observed_counts))
names(abund.list) <- c("Kingdom","Phylum","Class","Order","Family","Genus","OTU")
i <- 1
for(i in 1:7){
  abund.list[[i]] <- abundance_list_create(abund.list[[i]],abund.list[[i]][,1])
}
abund.list_saved <- abund.list
# Full dataset ready for Jun's Code:
preg_mircobiome_data <- list(otu.tab <- abund.list$OTU,
                              otu.name, abund.list,
                              meta.dat <- subdat)
names(preg_mircobiome_data) <- c("otu.tab", "otu.name","abund.list",
                                  "meta.dat")

### Now that we know which genera to use, 
# we can reshape the raw abundance data to be 
# merged with the meta-data for creating the heatmaps.
# So, the following reshapes the microbiome data.
## Now, merge the genera data with the meta data.
# first, make the first letters genera:(whatever thegenera is).
rownames(abund.list$Genus) <- paste0('Genus_',rownames(abund.list$Genus))
g.dat <- as.data.frame(t(abund.list$Genus))
g.dat$merged_IDs <- rownames(g.dat)
g.dat <- g.dat %>% arrange(desc(merged_IDs))
subdat <- subdat %>% arrange(desc(SampleID))

## Merged correctly!!!!!
cfrip_data_biom <- cbind(subdat, g.dat)
# therefore, this is the data that I can use for the remaining analyses.

Now, I need to identify the genera with greater than 2% relative abundance across all cases by timepoint and sample source.

## Now, identify the genera with abundance greater than 2%
genera_dat <- as.data.frame(matrix(ncol=3))
colnames(genera_dat) <- c('SampleSource', 'Genus', 'RelAbund')

## Set up loops
ITER <- c('S-01', 'S-02')

for(iter in ITER){
    # Subset to necessary Samples
    subdat.mi <- subset_samples(preg_microbiome_dat, SampleSource==iter)
    meta.sub <- subdat[subdat$SampleSource==iter, ]

    # Now prepare data for Jun's Code:
    # Start to get the file in the Jun Chen format
    otus.s <- psmelt(subdat.mi)
    otus.s <- otus.s[ otus.s$Sample %in% meta.sub$SampleID, ]
    k <- ncol(otus.s)
    otus.s <- otus.s[,c(1:3,(k-5):k)]
    otus2 <- reshape(otus.s, idvar = "OTU", timevar = "Sample", direction = "wide")
    OTU <- otus2$OTU
    Kingdom <- otus2[,3]
    Phylum <- otus2[,4]
    Class <- otus2[,5]
    Order <- otus2[,6]
    Family <- otus2[,7]
    Genus <- otus2[,8]
    
    # Next get just the observed counts of the OTUs
    observed_counts <- otus2[, grep("Abundance",colnames(otus2)) ]
    i <- 1
    for(i in 1:length(colnames(observed_counts))){
      colnames(observed_counts)[i] <- substring(colnames(observed_counts)[i], 11)
    }
    
    # OTU Names object
    otu.name <- cbind(Kingdom,Phylum,Class,Order,Family,Genus)
    rownames(otu.name) <- OTU
    
    # Abundance List Objects
    abund.list <- list( cbind(Kingdom,observed_counts),
                        cbind(Phylum,observed_counts),
                        cbind(Class,observed_counts),
                        cbind(Order,observed_counts),
                        cbind(Family,observed_counts),
                        cbind(Genus,observed_counts),
                        cbind(OTU,observed_counts))
    names(abund.list) <- c("Kingdom","Phylum","Class","Order","Family","Genus","OTU")
    i <- 1
    for(i in 1:7){
      abund.list[[i]] <- abundance_list_create(abund.list[[i]],abund.list[[i]][,1])
    }
    
    meta.sub  <- meta.sub[ meta.sub$SampleID %in% colnames(abund.list$Genus), ]
    
    # Calculate Genera Prevalances
    genus.abund <- as.data.frame(abund.list$Genus)
    # Total number of observed OTUs per person
    tot.count <- sum(apply(genus.abund, 1, sum))
    # Finding total abundance of each taxa
    genus.abund$Abundance <- apply(genus.abund, 1, sum)
    # Ordering by abundance
    # genus.abund <- genus.abund[order(genus.abund$Abundance,decreasing = T),]
    # Subset? to top 10 Genera
    genus.abund <- genus.abund[,ncol(genus.abund), drop = F]
    # Rescales the adbundances to 0-1
    genus.abund <- genus.abund / tot.count
    #genus.abund <- genus.abund[ order(genus.abund[,1], decreasing = T), , drop = F]
    
    # Subset to Genera with > .02 i.e. 2% abundance
    genus.abund <- genus.abund[genus.abund$Abundance > .02 , , drop = F]
    
      # Combine data from this condition
      a <- nrow(genus.abund)
      A <- cbind(rep(iter,a))
      A <- cbind(A, rownames(genus.abund), genus.abund[,1])
      colnames(A) <- c('SampleSource', 'Genus', 'RelAbund')
      
      # Combine this iteration with all previous
      genera_dat <- rbind(genera_dat, A)
} # End loops
genera_dat <- as.data.frame(genera_dat[-1,])
genera_dat$RelAbund <- as.numeric(genera_dat$RelAbund)

kable(genera_dat, format='html', digits=4,row.names = F) %>%
  kable_styling(full_width = T)
SampleSource Genus RelAbund
S-01 Alistipes 0.0257
S-01 Bacteroides 0.1485
S-01 Campylobacter 0.0225
S-01 Dialister 0.0556
S-01 Ezakiella 0.0401
S-01 Faecalibacterium 0.0243
S-01 Finegoldia 0.0327
S-01 Fusobacterium 0.0242
S-01 Lachnospiraceae 0.0366
S-01 Lactobacillus 0.0274
S-01 Peptoniphilus 0.0265
S-01 Porphyromonas 0.0622
S-01 Prevotella 0.1363
S-01 Prevotellaceae 0.1487
S-01 Ruminococcaceae 0.0442
S-02 Akkermansia 0.0204
S-02 Anaerococcus 0.0223
S-02 Bacteroides 0.1418
S-02 Dialister 0.0347
S-02 Faecalibacterium 0.0274
S-02 Finegoldia 0.0303
S-02 Lachnospiraceae 0.0590
S-02 Lactobacillus 0.1180
S-02 Peptoniphilus 0.0294
S-02 Porphyromonas 0.0451
S-02 Prevotella 0.1018
S-02 Prevotellaceae 0.1193
S-02 Ruminococcaceae 0.0231

First, by Food Frequency Questionnaires (FFQ) (Slide 16, Step 1A: FFQ/DHQII)

  1. Create matrix of gut microbiome and dietary variables from FFQ (DHQII)

    1. From “GRAMWT_G_USDA” to “Vegetablesforadjust”
  2. Remove all taxa (genus) that have <2% relative abundance in any sample of the gut microbiome

  3. For each taxon identify the median by sample time point

  4. For each measure of alpha diversity identify the median by time point

  5. For each dietary (all listed) variable

    1. Conduct 2-tailed t-test on dietary variables above vs below the median of each taxon
    2. Conduct spearman correlation between dietary variables and taxon
    3. Conduct 2-tailed t-test on dietary variables above vs below the median of each measure of alpha diversity
    4. Conduct spearman correlation between dietary variables and each measure of alpha diversity
  6. List all t-test results that have p<0.05 AND correlation R2 >0.5

  7. Make heat map of the spearman correlation results meeting the above criteria colored by strength of correlation and *’d if p<0.05 (see example)

Time point 1 analyses WITHOUT HEI SCORES

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-01')
data <- filter(cfrip_data_biom, SampleSource=='S-01')

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       4
## 2 Within      5
## 3 Above      10
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        6
## 2 Class 1       6
## 3 Class 2-3     7
## Measures to cut by median:
measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Which metrics for these analyses:
which(colnames(cfrip_data) == "GRAMWT_G_USDA")
## [1] 237
which(colnames(cfrip_data) == "VegetablesforadjustFlag")
## [1] 452
metrics <- colnames(cfrip_data)[237:452] ## Diet varibles only
## Run Taxa analyses
i<-j<-1
md <- as.data.frame(matrix(nrow=length(measures)*length(metrics), ncol=18))
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-01'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])  
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- t.test(form, data=data)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos
# # naming the columns
colnames(md) <- c('SampleSource', 'Diet_Factor', 'Alpha_Taxa', 'Median', 't', 'df', 'pvalue', 't-LL', 't-UL', 'Mean Below Median', 'Mean Above Median', 'Cohen d', 'd LL', 'd UL', 'rs', 'r2', 'n.below', 'n.above')
    
# kable(md, format='html',digits=3,row.names = F)%>%
#   kable_styling(full_width = T)

## Now for the subset of those with 
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01')
kable(md.s, format='html', digits=3, row.names = F) %>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
S-01 ENERGY_KCAL_USDA Genus_Bacteroides 156 -2.988 10.378 0.013 -1064.506 -157.651 839.386 1450.464 -1.559 391.985 -2.754 0.733 0.537 10 7
S-01 TOTAL_FAT_G_USDA Genus_Finegoldia 21 2.285 13.083 0.040 1.329 46.710 53.456 29.436 1.077 22.309 -0.031 -0.717 0.514 9 8
S-01 PROTEIN_G_USDA Genus_Bacteroides 156 -2.802 6.877 0.027 -57.237 -4.742 26.712 57.701 -1.613 19.207 -2.818 0.735 0.541 10 7
S-01 TOTAL_POLYUNSATURATED_FATTY_ACID Genus_Finegoldia 21 2.826 13.631 0.014 1.419 10.442 11.669 5.739 1.337 4.436 0.192 -0.767 0.589 9 8
S-01 TOTAL_POLYUNSATURATED_FATTY_ACID Genus_Peptoniphilus 24 4.409 9.827 0.001 3.860 11.786 13.020 5.197 2.228 3.511 0.910 -0.746 0.557 8 9
S-01 VITAMIN_E_AS_ALPHA_TOCOPHEROL_MG Genus_Peptoniphilus 24 5.577 12.481 0.000 2.442 5.551 7.069 3.072 2.767 1.444 1.319 -0.725 0.526 8 9
S-01 THIAMIN_VITAMIN_B1_MG_USDA Genus_Bacteroides 156 -3.287 9.514 0.009 -0.736 -0.139 0.617 1.054 -1.749 0.250 -2.979 0.800 0.639 10 7
S-01 THIAMIN_VITAMIN_B1_MG_USDA Genus_Lachnospiraceae 36 -3.163 9.610 0.011 -0.730 -0.125 0.621 1.049 -1.680 0.255 -2.897 0.753 0.566 10 7
S-01 NIACIN_MG_USDA Genus_Bacteroides 156 -4.053 7.348 0.004 -13.336 -3.568 7.958 16.410 -2.295 3.683 -3.639 0.831 0.690 10 7
S-01 VITAMIN_B6_MG_USDA Genus_Bacteroides 156 -3.564 7.477 0.008 -1.333 -0.278 0.842 1.647 -2.009 0.401 -3.290 0.846 0.715 10 7
S-01 PHOSPHORUS_MG_USDA Genus_Bacteroides 156 -2.749 6.890 0.029 -1044.436 -76.815 454.763 1015.389 -1.582 354.365 -2.781 0.752 0.566 10 7
S-01 IRON_MG_USDA Genus_Bacteroides 156 -2.684 9.103 0.025 -7.552 -0.651 7.003 11.104 -1.443 2.841 -2.619 0.784 0.615 10 7
S-01 IRON_MG_USDA Genus_Lachnospiraceae 36 -2.485 9.286 0.034 -7.410 -0.365 7.091 10.979 -1.330 2.923 -2.488 0.724 0.524 10 7
S-01 ZINC_MG_USDA Genus_Bacteroides 156 -2.724 8.111 0.026 -7.152 -0.602 4.516 8.393 -1.505 2.576 -2.691 0.757 0.574 10 7
S-01 SELENIUM_MCG_USDA Genus_Bacteroides 156 -2.970 7.129 0.020 -74.173 -8.555 34.986 76.350 -1.694 24.411 -2.914 0.755 0.570 10 7
S-01 SODIUM_MG_USDA Genus_Bacteroides 156 -2.635 6.813 0.035 -2225.955 -114.099 1121.460 2291.487 -1.521 769.332 -2.709 0.750 0.563 10 7
S-01 PFA_18_2_OCTADECADIENOIC_ACID_G_ Genus_Finegoldia 21 2.864 12.808 0.013 1.320 9.475 10.331 4.934 1.347 4.007 0.200 -0.771 0.594 9 8
S-01 PFA_18_2_OCTADECADIENOIC_ACID_G_ Genus_Peptoniphilus 24 4.148 9.369 0.002 3.165 10.658 11.450 4.539 2.103 3.286 0.813 -0.737 0.544 8 9
S-01 TOTAL_PROTEIN_G_NDSR Genus_Bacteroides 156 -2.634 6.689 0.035 -63.633 -3.126 26.389 59.769 -1.528 21.852 -2.717 0.725 0.526 10 7
S-01 VEGETABLE_PROTEIN_G_NDSR Genus_Bacteroides 156 -4.266 10.060 0.002 -14.976 -4.705 9.961 19.801 -2.242 4.390 -3.574 0.723 0.523 10 7
S-01 GAMMA_TOCOPHEROL_MG_NDSR Genus_Finegoldia 21 3.157 13.503 0.007 2.163 11.425 11.589 4.795 1.492 4.554 0.321 -0.737 0.543 9 8
S-01 TRYPTOPHAN_G_NDSR Genus_Bacteroides 156 -2.674 6.692 0.033 -0.738 -0.042 0.307 0.697 -1.550 0.252 -2.744 0.741 0.549 10 7
S-01 THREONINE_G_NDSR Genus_Bacteroides 156 -2.609 6.651 0.037 -2.419 -0.106 0.963 2.226 -1.516 0.833 -2.703 0.755 0.570 10 7
S-01 PHENYLALANINE_G_NDSR Genus_Bacteroides 156 -2.480 6.665 0.044 -2.832 -0.053 1.166 2.609 -1.440 1.002 -2.615 0.725 0.526 10 7
S-01 VALINE_G_NDSR Genus_Bacteroides 156 -2.432 6.604 0.047 -3.392 -0.027 1.312 3.021 -1.416 1.208 -2.587 0.752 0.566 10 7
S-01 ARGININE_G_NDSR Genus_Bacteroides 156 -2.600 6.911 0.036 -3.561 -0.164 1.490 3.353 -1.495 1.246 -2.679 0.712 0.508 10 7
S-01 HISTIDINE_G_NDSR Genus_Bacteroides 156 -2.441 6.570 0.047 -1.816 -0.017 0.691 1.607 -1.423 0.644 -2.595 0.734 0.538 10 7
S-01 ALANINE_G_NDSR Genus_Bacteroides 156 -2.549 6.653 0.040 -3.050 -0.098 1.230 2.804 -1.481 1.063 -2.662 0.720 0.518 10 7
S-01 GLUTAMIC_ACID_G_NDSR Genus_Bacteroides 156 -2.540 6.653 0.040 -11.724 -0.358 4.946 10.987 -1.476 4.094 -2.656 0.745 0.555 10 7
S-01 SERINE_G_NDSR Genus_Bacteroides 156 -2.405 6.627 0.049 -3.059 -0.008 1.175 2.709 -1.398 1.097 -2.567 0.752 0.566 10 7
S-01 ASH_G_NDSR Genus_Bacteroides 156 -3.389 9.144 0.008 -11.514 -2.309 7.516 14.427 -1.820 3.797 -3.064 0.712 0.508 10 7
S-01 TRANS_18_1_TRANS_OCTADECENOIC_AC Genus_Finegoldia 21 2.903 12.384 0.013 0.400 2.775 2.968 1.380 1.361 1.166 0.212 -0.721 0.519 9 8
S-01 NIACIN_EQUIVALENTS_MG_NDSR Genus_Bacteroides 156 -3.051 6.702 0.020 -25.394 -3.104 12.821 27.070 -1.769 8.057 -3.002 0.750 0.563 10 7
S-01 OMEGA_3_FATTY_ACIDS_G_NDSR Genus_Peptoniphilus 24 4.856 14.976 0.000 0.405 1.040 1.294 0.571 2.347 0.308 1.002 -0.748 0.560 8 9
S-01 XYLITOL_G_NDSR Genus_Peptoniphilus 24 3.607 9.671 0.005 0.006 0.025 0.021 0.006 1.825 0.009 0.593 -0.737 0.543 8 9
S-01 NITROGEN_G_NDSR Genus_Bacteroides 156 -2.660 6.700 0.034 -10.306 -0.559 4.275 9.707 -1.542 3.523 -2.734 0.735 0.541 10 7
S-01 EnergyfromTOTAL_FAT_G_USDA Genus_Finegoldia 21 4.879 14.258 0.000 8.439 21.640 41.088 26.049 2.386 6.303 1.032 -0.744 0.554 9 8
S-01 EnergyfromCARBOHYDRATE_G_USD Genus_Finegoldia 21 -4.521 14.995 0.000 -28.462 -10.222 45.347 64.689 -2.182 8.865 -3.489 0.727 0.528 9 8
S-01 EnergyfromTOTAL_POLYUNSATURA Genus_Finegoldia 21 4.337 14.120 0.001 2.037 6.017 9.104 5.077 2.060 1.955 0.779 -0.775 0.601 9 8
## Full Heatmap
dat <- filter(md, SampleSource == 'S-01')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_heatmap_S-01.png', plot=p, width=9, heigh=30, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_heatmap_S-01_subset.png', plot=p, width=9, heigh=15, units="in")

Time point 2 analyses WITHOUT HEI SCORES

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-02')
data <- filter(cfrip_data_biom, SampleSource=='S-02')

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       3
## 2 Within      5
## 3 Above       5
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        3
## 2 Class 1       4
## 3 Class 2-3     6
## Measures to cut by median:
#measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Run Taxa analyses
i<-1
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-02'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])  
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- t.test(form, data=data)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos
    
# kable(md, format='html',digits=3,row.names = F)%>%
#   kable_styling(full_width = T)

## Now for the subset of those with 
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02')
kable(md.s, format='html',digits=3,row.names = F)%>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
S-02 MFA_22_1_DOCOSENOIC_ACID_G_USDA Genus_Alistipes 14 -2.597 5.143 0.047 -0.088 -0.001 0.006 0.050 -1.568 0.028 -2.967 0.815 0.665 7 6
S-02 MFA_22_1_DOCOSENOIC_ACID_G_USDA Genus_Bacteroides 236 -2.597 5.143 0.047 -0.088 -0.001 0.006 0.050 -1.568 0.028 -2.967 0.861 0.741 7 6
S-02 PFA_22_5_DOCOSAPENTAENOIC_ACID_G Genus_Faecalibacterium 37 -2.617 6.385 0.038 -0.027 -0.001 0.003 0.017 -1.547 0.009 -2.942 0.709 0.503 7 6
S-02 INOSITOL_G_NDSR Genus_Prevotellaceae 159 2.863 6.146 0.028 0.033 0.408 0.256 0.035 1.468 0.150 0.089 -0.727 0.529 7 6
## Heatmap
dat <- filter(md, SampleSource == 'S-02')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')

p

#ggsave('FFQ_heatmap_S-02.png', plot=p, width=9, heigh=30, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_heatmap_S-02_subset.png', plot=p, width=9, heigh=15, units="in")

Rerun Time point 1 WITH HEI SCORES

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-01')
data <- filter(cfrip_data_biom, SampleSource=='S-01')

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       4
## 2 Within      5
## 3 Above      10
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        6
## 2 Class 1       6
## 3 Class 2-3     7
## Measures to cut by median:
measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Which metrics for these analyses:
metrics <- colnames(cfrip_data)[457:470] ## HEI Variables


## Run Taxa analyses
i<-j<-1
md <- as.data.frame(matrix(nrow=length(measures)*length(metrics), ncol=18))
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-01'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])  
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- t.test(form, data=data)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos
# # naming the columns
colnames(md) <- c('SampleSource', 'Diet_Factor', 'Alpha_Taxa', 'Median', 't', 'df', 'pvalue', 't-LL', 't-UL', 'Mean Below Median', 'Mean Above Median', 'Cohen d', 'd LL', 'd UL', 'rs', 'r2', 'n.below', 'n.above')
    
# kable(md, format='html',digits=3,row.names = F)%>%
#   kable_styling(full_width = T)

## Now for the subset of those with 
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01')
kable(md.s, format='html',digits=3,row.names = F)%>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
S-01 kcal_HEI Genus_Peptoniphilus 24 2.524 9.272 0.032 87.829 1543.006 2475.668 1660.251 1.19 685.343 0.106 -0.727 0.529 9 9
## Full Heatmap
dat <- filter(md, SampleSource == 'S-01')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_HEI_heatmap_S-01.png', plot=p, width=9, heigh=6, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_HEI_heatmap_S-01_subset.png', plot=p, width=9, heigh=15, units="in")

Rerun Time point 2 WITH HEI SCORES

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-02')
data <- filter(cfrip_data_biom, SampleSource=='S-02')

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       3
## 2 Within      5
## 3 Above       5
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        3
## 2 Class 1       4
## 3 Class 2-3     6
## Measures to cut by median:
#measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Run Taxa analyses
i<-1
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-02'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])  
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- t.test(form, data=data)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos
    
# kable(md, format='html',digits=3,row.names = F)%>%
#   kable_styling(full_width = T)

## Now for the subset of those with 
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02')
kable(md.s, format='html',digits=3,row.names = F)%>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
## Heatmap
dat <- filter(md, SampleSource == 'S-02')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')

p

#ggsave('FFQ_HEI_heatmap_S-02.png', plot=p, width=9, heigh=6, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('FFQ_HEI_heatmap_S-02_subset.png', plot=p, width=9, heigh=15, units="in")

Second, by ASA24 survey of food (Slide 17, Step 1B: ASA24)

  1. Create matrix of gut microbiome and dietary variables matched by time point (i.e. gut sample 1 AND ASA24 RecallNo 1)

  2. Remove all taxa (genus) that have <2% relative abundance in any sample of the gut microbiome

  3. For each taxon identify the median by sample time point

  4. For each measure of alpha diversity identify the median by time point

  5. For each dietary (all listed) variable

    1. Conduct 2-tailed t-test on dietary variables above vs below the median of each taxon
    2. Conduct spearman correlation between dietary variables and taxon
    3. Conduct 2-tailed t-test on dietary variables above vs below the median of each measure of alpha diversity
    4. Conduct spearman correlation between dietary variables and each measure of alpha diversity
  6. List all t-test results that have p<0.1 AND correlation R2 >0.5

  7. Make heat map of the spearman correlation results meeting the above criteria colored by strength of correlation and satrred’d if p<0.05 (see example)

Time point 1 analyses

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-01')
data <- filter(cfrip_data_biom, SampleSource=='S-01', RecallNo == 1 )

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       4
## 2 Within      5
## 3 Above       9
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        6
## 2 Class 1       5
## 3 Class 2-3     7
## Measures to cut by median:
measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Metrics that now include HEI Scores
which(colnames(cfrip_data) == "Lang")
## [1] 120
metrics <- colnames(cfrip_data)[c(120:190)] # Diet recall only  237:452, 


## Run Taxa analyses
i<-j<-1
md <- as.data.frame(matrix(nrow=length(measures)*length(metrics), ncol=18))
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-01-r1'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- tryCatch(t.test(form, data=data), error = function(e) NA)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos
# # naming the columns
colnames(md) <- c('SampleSource', 'Diet_Factor', 'Alpha_Taxa', 'Median', 't', 'df', 'pvalue', 't-LL', 't-UL', 'Mean Below Median', 'Mean Above Median', 'Cohen d', 'd LL', 'd UL', 'rs', 'r2', 'n.below', 'n.above')


## Now for the subset of those with
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01-r1')
kable(md.s, format='html',digits=3,row.names = F)%>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
S-01-r1 KCAL_ASA24 Genus_Peptoniphilus 21.5 2.850 9.772 0.018 200.646 1659.186 2529.077 1599.161 1.344 692.112 0.237 -0.768 0.589 9 9
S-01-r1 tfat Genus_Peptoniphilus 21.5 3.144 15.949 0.006 12.503 64.303 93.753 55.350 1.482 25.911 0.354 -0.780 0.608 9 9
S-01-r1 magn Genus_Peptoniphilus 21.5 2.775 9.455 0.021 27.029 256.165 335.686 194.089 1.308 108.223 0.207 -0.818 0.669 9 9
S-01-r1 phos Genus_Peptoniphilus 21.5 2.823 13.524 0.014 132.526 982.130 1613.951 1056.623 1.331 418.766 0.226 -0.818 0.669 9 9
S-01-r1 pota Genus_Peptoniphilus 21.5 2.717 11.695 0.019 194.696 1795.782 2770.942 1775.703 1.281 777.164 0.184 -0.800 0.639 9 9
S-01-r1 sodi Genus_Peptoniphilus 21.5 2.449 14.760 0.027 184.866 2694.809 4223.457 2783.620 1.154 1247.239 0.075 -0.739 0.546 9 9
S-01-r1 zinc Genus_Finegoldia 26.0 3.624 15.936 0.002 2.200 8.405 12.897 7.594 1.708 3.104 0.541 -0.794 0.630 9 9
S-01-r1 copp Genus_Peptoniphilus 21.5 3.290 9.354 0.009 0.201 1.071 1.406 0.770 1.551 0.410 0.411 -0.838 0.702 9 9
S-01-r1 S160 Genus_Peptoniphilus 21.5 2.555 14.930 0.022 1.190 13.207 17.026 9.828 1.204 5.977 0.118 -0.807 0.651 9 9
S-01-r1 mfat Genus_Peptoniphilus 21.5 3.343 15.057 0.004 5.359 24.204 33.875 19.094 1.576 9.381 0.432 -0.788 0.621 9 9
S-01-r1 M181 Genus_Peptoniphilus 21.5 3.350 14.964 0.004 5.071 22.827 31.750 17.800 1.579 8.834 0.435 -0.812 0.659 9 9
## Heatmap
dat <- filter(md, SampleSource == 'S-01-r1')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits=c(-1,1)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('ASA24_heatmap_S-01-r1.png', plot=p, width=9, heigh=30, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-01-r1')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('ASA24_heatmap_S-01-r1_subset.png', plot=p, width=7, heigh=11, units="in")

Time point 2 analyses

alpha.measures <- c('Observed_OTUs', 'Shannon', 'Simpson', 'InvSimpson', 'Fisher')
taxa <- filter(genera_dat, SampleSource=='S-02')
data <- filter(cfrip_data_biom, SampleSource=='S-02', RecallNo == 2)

#  SAMPLE SIZE OF DATA
data %>%
  group_by(GWG_Cat) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   GWG_Cat     n
##   <ord>   <int>
## 1 Below       3
## 2 Within      4
## 3 Above       5
data %>%
  group_by(BMI_Class) %>%
  summarise(n = n())
## # A tibble: 3 x 2
##   BMI_Class     n
##   <ord>     <int>
## 1 Normal        3
## 2 Class 1       3
## 3 Class 2-3     6
## Measures to cut by median:
measures <- c(alpha.measures, paste0('Genus_',taxa$Genus))

## Run Taxa analyses
i<-1
for(met in metrics){
  for(i in 1:length(measures)){
    md[j,1] <- 'S-02-r2'
    md[j,2] <- met
    md[j,3] <- m <- measures[i]
    md[j,4] <- median(data[, m])
    ## Create dummy code for above belowmedian of taxa
    data[,paste0('median_code_',m)] <- ifelse(data[, m] > md[j,4], 1, 0)
    ## Set up formula
    form <- as.formula(paste0(met,' ~ ', 'median_code_',m))
    ## T-test
    t.out <- tryCatch(t.test(form, data=data), error = function(e) NA)
    ## Compute cohen's d
    c.d <- cohen.d(form, data=data)
    ## Correlation (spearman)
    sp.cor <- cor(na.omit(data[, c(m,met)]),
                  method='spearman')
    sp.cor <- sp.cor[1,2]
    ## SAve results
    md[j,5:11] <- as.numeric(unlist(t.out)[1:7])
    md[j,12:14] <- as.numeric(unlist(c.d)[3:5])
    md[j,15] <- sp.cor
    md[j,16] <- sp.cor**2
    # get sample sizes used for above vs. below
    temp.dat <- na.omit(data[, c(m,met, paste0('median_code_',m))])
    n.below <- nrow(temp.dat[ temp.dat[,3] == 0, ])
    n.above <- nrow(temp.dat[ temp.dat[,3] == 1, ])
    md[j,17] <- n.below
    md[j,18] <- n.above
    ## update iterator
    j <- j + 1
  } ## Loop around median measures
} ## end loop around metrics of dietary factos

## Now for the subset of those with
md.s <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02-r2')
kable(md.s, format='html',digits=3,row.names = F)%>%
  kable_styling(full_width = T)
SampleSource Diet_Factor Alpha_Taxa Median t df pvalue t-LL t-UL Mean Below Median Mean Above Median Cohen d d LL d UL rs r2 n.below n.above
S-02-r2 tfat Genus_Lactobacillus 23.0 2.751 8.931 0.023 10.965 113.059 124.395 62.382 1.588 39.039 0.113 -0.819 0.671 6 6
S-02-r2 iron_ASA24 Genus_Ruminococcaceae 39.0 -2.833 6.911 0.026 -14.423 -1.281 15.952 23.804 -1.636 4.801 -3.122 0.765 0.585 6 6
S-02-r2 sele Genus_Prevotella 81.5 -3.018 8.148 0.016 -177.487 -24.011 96.170 196.919 -1.742 57.820 -3.253 0.823 0.678 6 6
S-02-r2 niac Genus_Dialister 52.0 -2.955 5.579 0.028 -22.904 -1.946 24.721 37.146 -1.706 7.282 -3.209 0.723 0.522 6 6
S-02-r2 fola Genus_Finegoldia 30.0 -2.952 9.085 0.016 -486.508 -64.733 392.626 668.247 -1.705 161.698 -3.206 0.737 0.543 6 6
S-02-r2 S160 Genus_Lactobacillus 23.0 2.730 9.796 0.022 1.909 19.140 21.428 10.904 1.576 6.678 0.103 -0.746 0.556 6 6
S-02-r2 mfat Genus_Lactobacillus 23.0 3.629 8.819 0.006 10.250 44.461 47.660 20.305 2.095 13.056 0.494 -0.896 0.804 6 6
S-02-r2 M181 Genus_Lactobacillus 23.0 3.822 9.022 0.004 10.626 41.418 44.808 18.786 2.207 11.792 0.575 -0.868 0.754 6 6
S-02-r2 M201 Genus_Lactobacillus 23.0 2.936 7.063 0.022 0.063 0.578 0.525 0.205 1.695 0.189 0.195 -0.760 0.577 6 6
## Heatmap
dat <- filter(md, SampleSource == 'S-02-r2')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits=c(-1,1)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')

p

#ggsave('ASA24_heatmap_S-02-r2.png', plot=p, width=9, heigh=30, units="in")

# Subset heatmap
dat <- filter(md, r2 > r.cut, pvalue < p.cut, SampleSource == 'S-02-r2')
dat$Alpha_Taxa <- factor(dat$Alpha_Taxa, levels=measures, ordered=T)
p <- ggplot(dat, aes(x = Alpha_Taxa, y = Diet_Factor)) +
  geom_tile(aes(fill = rs)) +
  scale_fill_gradient2(name="Spearman's r", limits = c(-1,1)) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=-.05),
        legend.position = 'bottom')
p

#ggsave('ASA24_heatmap_S-02-r2_subset.png', plot=p, width=9, heigh=15, units="in")

Scatterplots of Percent Abundance by SOFAAS

bacteria: Bacteroides, Finegoldia, Peptoniphilus

# Calc %abundance
cfrip_data_biom$sums <- rowSums(cfrip_data_biom[, grep('Genus', colnames(cfrip_data_biom))])
k <- length(grep('Genus', colnames(cfrip_data_biom)))
knames <- colnames(cfrip_data_biom)
cfrip_data_biom <- cbind(cfrip_data_biom, 
                         matrix(nrow=nrow(cfrip_data_biom),
                                ncol=k))

lnames <- paste0('perc_',colnames(cfrip_data_biom)[grep('Genus', colnames(cfrip_data_biom))])
colnames(cfrip_data_biom) <- c(knames, lnames)


for(m in 1:length(lnames)){
  cfrip_data_biom[, lnames[m]] <- cfrip_data_biom[, knames[m+471]]/cfrip_data_biom[, 'sums']
}# End loop

data <- filter(cfrip_data_biom, SampleSource=='S-01')

# SOFAAS by Bacteroides
subdat <- data[,c('HEIX12_SOFAAS','perc_Genus_Bacteroides','GWG_Cat')]
subdat <- na.omit(subdat)
p1 <- ggplot(subdat, aes(x=HEIX12_SOFAAS, 
                   y=perc_Genus_Bacteroides, 
                   color=GWG_Cat,
                   shape=GWG_Cat)) +
  geom_point(aes(size=2)) +
  lims(x=c(8,20), y=c(0,.5)) +
  labs(x='HEI SOFAAS', y='%Abundance Bacteroides') +
  scale_color_brewer(name='GWG Category',palette="Dark2")+
  scale_shape_discrete(name='GWG Category') +
  theme(legend.position="bottom") + 
  guides(size=FALSE)
p1

# SOFAAS by Finegoldia
subdat <- data[,c('HEIX12_SOFAAS','perc_Genus_Finegoldia','GWG_Cat')]
subdat <- na.omit(subdat)
p2 <- ggplot(subdat, aes(x=HEIX12_SOFAAS, 
                   y=perc_Genus_Finegoldia, 
                   color=GWG_Cat,
                   shape=GWG_Cat)) +
  geom_point(aes(size=2)) +
  lims(x=c(8,20), y=c(0,.5)) +
  labs(x='HEI SOFAAS', y='%Abundance Finegoldia') +
  scale_color_brewer(name='GWG Category',palette="Dark2")+
  scale_shape_discrete(name='GWG Category') +
  theme(legend.position="bottom") + 
  guides(size=FALSE)
p2

# SOFAAS by Peptoniphilus
subdat <- data[,c('HEIX12_SOFAAS','perc_Genus_Peptoniphilus','GWG_Cat')]
subdat <- na.omit(subdat)
p3 <- ggplot(subdat, aes(x=HEIX12_SOFAAS, 
                   y=perc_Genus_Peptoniphilus, 
                   color=GWG_Cat,
                   shape=GWG_Cat)) +
  geom_point(aes(size=2)) +
  lims(x=c(8,20), y=c(0,.5)) +
  labs(x='HEI SOFAAS', y='%Abundance Peptoniphilus') +
  scale_color_brewer(name='GWG Category',palette="Dark2")+
  scale_shape_discrete(name='GWG Category') +
  theme(legend.position="bottom") + 
  guides(size=FALSE)
p3

# ggsave('scatter_sofaas_Bacteroides.pdf', plot=p1)
# ggsave('scatter_sofaas_Finegoldia.pdf', plot=p2)
# ggsave('scatter_sofaas_Peptoniphilus.pdf', plot=p3)
# # 
# ggsave('scatter_sofaas_Bacteroides.png', plot=p1)
# ggsave('scatter_sofaas_Finegoldia.png', plot=p2)
# ggsave('scatter_sofaas_Peptoniphilus.png', plot=p3)